Skip to contents
## library(devtools)
## library(BiocManager)
library(SingleCellExperiment)
library(ggplot2)
library(scFeatures) ## devtools::install_github("SydneyBioX/scFeatures")
library(ClassifyR) ## BiocManager::install("ClassifyR", dependencies = TRUE)
library(lisaClust)
library(ggthemes)
library(spicyR) ## BiocManager::install("spicyR")
library(dplyr)
library(limma)
library(plotly)
library(scattermore)
library(tidyr)
library(survival)
library(survminer)
library(spatstat)
library(scater)
library(scran)
library(reshape)

theme_set(theme_classic())
# code for plotting purpose
plot_boxplot <- function( feature ){
  
  data_plot <- t(feature)
  data_plot <- melt(data_plot )
  
  colnames(data_plot) <- c("X1", "X2", "value")
  data_plot$condition <- unlist( lapply( strsplit(  as.character( data_plot$X2), "_cond_"), `[`, 2))
  
  p <- ggplot(data_plot, aes( x = X1, y = value , colour = condition)) + 
    geom_boxplot()  + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
   
  return(p)
}
  

plot_barplot <- function(data , dodge=F  ){

  data$patient <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 1))
  data$condition <- unlist( lapply( strsplit( rownames(data ), "_cond_"), `[`, 2))
  
  data <- as.data.frame( melt(data, id=c("patient", "condition")) )
 
  p <-   ggplot(data , aes( x = patient , y = value , fill = variable) ) +   
    geom_bar(stat="identity"   ) + facet_wrap(~condition, scale="free") + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 
 
 return (p)
}


 
draw_dotplot <- function(data_sce, sample, celltype , group , group_of_interest ){

  df <- data.frame(colData( data_sce))
  # for each patient, calculate the proportion of each cell type in each region.  

  df_plot <- NULL
  for ( thispatient in unique(df[[sample]])){
    this_df <- df[df[[sample]]== thispatient, ]
    temp_df <-   table( this_df$region, this_df[[celltype]] )
    temp_df <-  temp_df / rowSums(temp_df)
     temp_df <- data.frame(  temp_df)
    temp_df$patient <-  thispatient
    temp_df$group <- clinical[clinical[[sample]] == thispatient,  group ]  
    df_plot <- rbind(df_plot, temp_df)
  }
  
  # for each region, calculate the average proportion of each cell type across all individuals 
  df_plot <- df_plot %>% dplyr::group_by( Var1 , Var2, group) %>% 
    summarise(mean_proportion = mean(Freq))
    
  # we are only interested in the short term and long term survival individuals 
  df_plot  <- df_plot [ df_plot$group %in% group_of_interest, ]
   
  p <- ggplot(df_plot, aes(y = Var2, x = Var1 ,colour =mean_proportion  , size = mean_proportion ))+  
    geom_point() + 
    facet_grid(~group, scales = "free", space = "free" ) + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    xlab("Region" ) + ylab("Cell type") + scale_colour_viridis_c()
  
  return(p)
}

draw_region_clustering_result <- function( data_sce, sample , selected_sample   ){
  # get meta data for a selected patient to visualise 
  metadata <- colData(data_sce)
  metadata <- metadata[metadata[[sample]] == selected_sample ,  ]
  metadata <- data.frame(metadata)

  
  plotlist <- list() # define the list to store images for region highlighting 
  plotlist_celltype <- list() # define the list to store images for cell-type highlighting 
   
  # optional: define a colour palette
  # you can also specify your own colour to use in the variable color_codes 
  tableau_palette <- scale_colour_tableau( palette = "Tableau 20") 
  color_codes <- tableau_palette$palette(18)
  color_codes <- c(color_codes, "darkgrey")  # for all other regions apart from region of interest, make the colour grey 
  names(color_codes) <- c(unique(metadata$description) ,  "other regions")
  
  
  # look through each region to highlight the region of interest, as well as the cell type in the region of interest
  for ( thisregion in sort(unique(metadata$region))){
      
        # select the region of interest
        selected_region_index <-  metadata$region == thisregion
        
        # for all other regions, define them as "other regions" so that they will be greyed out 
        metadata$selected_region <-  "other regions"
        metadata$selected_region[selected_region_index] <- "selected region"
        
        # for all cell types outside the region of the interest, also make them greyed out 
        metadata$celltype <- metadata$description
        metadata$celltype[!selected_region_index ] <-   "other regions"
        metadata$celltype <- factor(metadata$celltype, levels = c(unique(metadata$description), "other regions"))

        # plot ggplot highlighting the region of interest
       p <- ggplot(metadata, aes(x = Location_Center_X , y = Location_Center_Y , colour = selected_region  )) + geom_point(alpha = 0.7) + ggtitle(thisregion) + scale_colour_manual(values = c("grey" , "red"))
         
       # plot ggplot highlighting the cell-types in the region of interest 
       p2 <-  ggplot(metadata, aes(x = Location_Center_X , y = Location_Center_Y , colour =  celltype )) + geom_point(alpha = 0.7 ) + ggtitle(thisregion) + scale_colour_manual(values =  color_codes)
       
      plotlist [[thisregion ]] <- p
       
      plotlist_celltype [[thisregion ]] <- p2
      
  }
  
  a <- ggarrange(plotlist = plotlist , ncol = 5, nrow = 1 , common.legend = T )
  b <- ggarrange(plotlist = plotlist_celltype , ncol = 5, nrow = 1 , common.legend = T )
  
  return (list(a,b))
}


draw_selected_region_boxplot <- function(data_sce, sample , celltype, group ,  group_of_interest, select_region){
  
    df <- clinical[colData(data_sce)[, sample ], ]
   df$region <- data_sce$region
   df[[celltype]] <- data_sce[[celltype]]
  
   df_plot <- NULL
   for ( thispatient in unique(df[[sample]])){
      this_df <- df[df[[sample]] == thispatient, ]
      temp_df <-   table( this_df$region, this_df[[celltype]] )
      temp_df <-  temp_df / rowSums(temp_df)
      temp_df <- data.frame(  temp_df)
      temp_df$patient <-  thispatient
      temp_df$group <- unique( this_df[[group]])
      df_plot <- rbind(df_plot, temp_df)
    }
    
     
  df_plot_region <- df_plot[df_plot$Var1 == select_region , ]
  
    
  df_plot_region  <-  df_plot_region [ df_plot_region$group %in% group_of_interest, ]
   
  p <- ggplot(df_plot_region,  aes(x =  Var2,  y = Freq, colour = group)) +
    geom_boxplot()+ 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))  + 
    ylab("Proportion") + xlab("Cell type") + ggtitle("Selected region") + ylim(0, 0.25)
   
  
  return(p)
 
}

Overview

The recent development of spatial omics allows us to examine the spatial organisation of cells within tissues in their native environment. This workshop will discuss the challenges and analytical skills associated working with multi-sample spatial datasets and their use in disease risk prognosis. We will also introduce general analytical strategies and critical thinking questions that arise in the workflow.


Preparation and assumed knowledge

Learning Objectives

  • Describe and visualise spatial omics datasets
  • Calculate different spatial statistics at the cell-type level using scFeatures
  • Perform multi-view disease outcome prognosis with the package ClassifyR
  • Understand and perform cell segmentation using BIDCell
  • Develop an understanding of:
    • evaluation of classification and survival models
    • evaluate cohort heterogeneity given a survival model
  • Explore various strategies for disease outcome prognosis using spatial omics data

Note: This data workshop offers participants the opportunity to engage in hands-on analysis using R. However, if you are not comfortable with coding in R, you can still acquire valuable analytical skills by reviewing the output we provide in this file.

Time outline

Structure of the 3-hour workshop:

Activity Time
Part 1: Introduction 20m
Part 2: Exploring spatial data 20m
Part 3: Feature engineering with scFeatures 30m
Break 15m
Part 4: Survival analysis with ClassifyR 15m
Part 5. Identify cohort heterogeneity 15m
Part 6: Cell segmentation with BIDCell 20m
Concluding remarks

Part 2: Exploring spatial data

Data and background

A subset of the widely-known METABRIC breast cancer cohort has recently been analyzed using imaging mass cytometry: Imaging Mass Cytometry and Multiplatform Genomics Define the Phenogenomic Landscape of Breast Cancer. The question we are asking is: can their risk of recurrence be accurately estimated and therefore inform how aggressively they need to be treated? The other component of the analysis is patient clinical data, which has been sourced from Supplementary Table 5 of Dynamics of Breast-cancer Relapse Reveal Late-recurring ER-positive Genomic Subgroups.

## Quick look at data
assay(data_sce)[1:5, 1:5]
##           MB-0002:345:93 MB-0002:345:107 MB-0002:345:113 MB-0002:345:114
## HH3_total     4.82032390       3.8229411      2.55845170      4.82208870
## CK19          1.21185160       1.3223530      0.13832258      0.33366668
## CK8_18        2.80274720       2.4720588      0.60545164      2.43351100
## Twist         0.14651649       0.1176471      0.09877419      0.20791112
## CD68          0.07863736       0.1016471      0.03225806      0.05237778
##           MB-0002:345:125
## HH3_total      2.41391110
## CK19           0.07055555
## CK8_18         0.20724444
## Twist          0.12004445
## CD68           0.00000000

Basic characteristics of the data objects:
- The dataset contains 38 proteins and 76307 cells.
- The outcome is recurrence-free survival.

Covariates

DT::datatable(data.frame(colData(data_sce)))